
#### Time Series Data Analysis for paper "Less reliable media drive interest in anti-vaccine information"
#### Last Modified Date- 18th May2023
##### This file contains the time series analysis (ADF test for stationarity, fitting VAR model, granger causality tests, IRF plots) on the final timeseries data used in the paper. The VAR model uses the data at levels. We run this analysis for Antivaxx terms across different platforms, media outlets.  The google trends data variable was generated using google trends and is restricted to queries from the US. For further questions, please reach out to the authors at ss5910@columbia.edu.


#importing relevant packages
import pandas
import pandas as pd
#check and install version 0.24 of pandas if needed
#pip install --upgrade pandas==0.24
import glob
import numpy as np
import seaborn as sns
import warnings
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy import stats
from matplotlib import pyplot
import collections
warnings.simplefilter(action='ignore', category=FutureWarning)
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)
import datetime
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
#ignore warnings
warnings.filterwarnings("ignore")

#import time series dataset
df_raw=pd.read_csv("timeseries_data_antivaxx.csv")
df_new6=df_raw
#convert to date time, sort and set dates for timeframe of interest
df_new6['date']=pd.to_datetime(df_new6['date']).dt.date

#Run ADF test for stationarity of data
#print statements are commented here but user may use them as needed- we used these mostly when using jupyter notebook
#The more negative adf statistic than the critical value, the more likely to reject the null hypothesis (we have a stationary dataset).
#1st example; critical val -8.7<-3.5,-2.8,-2.5 (t-values at 1%,5%and 10% confidence intervals), null hypothesis can be rejected. So there is stationarity in data
#print("\033[1m" +
     # "Results for the The Augmented Dickey Fuller Test (ADF), unit root test for stationarity: "
      #+ "\033[0m" ,"\n")

adf1_multivar=adfuller(df_new6.norm_search_antivaxx, maxlag=10)
#print(adf1_multivar)
#print("\n")
adf3_multivar=adfuller(df_new6.norm_media_low, maxlag=10)
#print(adf3_multivar)
#print("\n")
adf4_multivar=adfuller(df_new6.norm_media_mid, maxlag=10)
#print(adf4_multivar)
#print("\n")
adf5_multivar=adfuller(df_new6.norm_media_high, maxlag=10)
#print(adf5_multivar)
#print("\n")
adf6_multivar=adfuller(df_new6.norm_media_U, maxlag=10)
#print(adf6_multivar)
#print("\n")

# print("\n")
adf9_multivar=adfuller(df_new6.norm_google_antivaxx, maxlag=10) #non-stationary
#print(adf9_multivar)
#print("\n")
adf10_multivar=adfuller(df_new6.norm_twitter_antivaxx, maxlag=10)
#print(adf10_multivar)
adf11_multivar=adfuller(df_new6.norm_media_unreliable, maxlag=10)
#print(adf11_multivar)
#variable is non-stationary if second value is >0.05
#1 variable not stationary: norm_media_mid
#Note- we exclude the non-stationary variable in our analysis as this makes the VAR results more robust

#Fit the VAR model
dates = df_new6['date']
mdata = df_new6[['norm_search_antivaxx','norm_media_high','norm_media_low','norm_media_U','norm_media_unreliable','norm_google_antivaxx','norm_twitter_antivaxx']]

#print(mdata.shape)
#print("\n")
#print("\033[1m" + "Displaying head of the dataframe before fitting the VAR model: " +
     # "\033[0m","\n")

mdata.index = pandas.DatetimeIndex(dates)

mdata=mdata.dropna()
#print(mdata.head())
data=mdata.dropna()
#data_desc=data.describe()

#make a VAR model
model = VAR(data)

#AIC criterion for optimal lag selection among multiple lags (lowest AIC >>> better model)
#results = model.fit(7)
results = model.fit(maxlags=7, ic='aic')
#print("\n")
#print("\033[1m" + "Results after fitting VAR model:" + "\033[0m" ,"\n")
#print(results.summary())

#Plot results after fitting VAR model
results.plot()
#print("\033[1m" + "Plot results after fitting VAR model" + "\033[0m" ,"\n")
#plt.show(block=False)
#print("\n","\n")
#Print ACF Plot for residuals
#If autocorr values are close to 0, then values between consecutive observations are not correlated with one another.
#Inversely, autocorr values close to 1 or -1 indicate that there exists strong +ve or -ve correlations between consecutive observations, respectively.
results.plot_acorr()
#print("\033[1m" + "ACF Plot for checking autocorrelation"+ "\033[0m")
#plt.show(block=False)
acf=results.plot_acorr()
acf.savefig('acf_weapon_2021_jan2023.jpg', dpi=400)
#print("\n","\n")


#Run and print Granger causality test results
#Media and Search
#print("\033[1m" + "Results for granger causality test:"+ "\033[0m","\n","\n")
#print(results.test_causality('norm_media_high', 'norm_search_antivaxx', kind='f').summary())
#print("\n","\n")
#print(results.test_causality('norm_search_antivaxx', 'norm_media_high', kind='f').summary())

#print("\033[1m" + "Results for granger causality test:"+ "\033[0m","\n","\n")
#print(results.test_causality('norm_media_mid', 'norm_search_antivaxx', kind='f').summary())
#print("\n","\n")
#print(results.test_causality('norm_search_antivaxx', 'norm_media_mid', kind='f').summary())


#print("\033[1m" + "Results for granger causality test:"+ "\033[0m","\n","\n")
#print(results.test_causality('norm_media_low', 'norm_search_antivaxx', kind='f').summary())
#print("\n","\n")
#print(results.test_causality('norm_search_antivaxx', 'norm_media_low', kind='f').summary())



# print("\n","\n")
# print("\033[1m" + "Results for granger causality test:"+ "\033[0m","\n","\n")
# print(results.test_causality('norm_media_U', 'norm_search_antivaxx', kind='f').summary())
# print("\n","\n")
# print(results.test_causality('norm_search_antivaxx', 'norm_media_U', kind='f').summary())
# print("\n","\n")

# print("\n","\n")
# print("\033[1m" + "Results for granger causality test:"+ "\033[0m","\n","\n")
# print(results.test_causality('norm_media_unreliable', 'norm_search_antivaxx', kind='f').summary())
# print("\n","\n")
# print(results.test_causality('norm_search_antivaxx', 'norm_media_unreliable', kind='f').summary())
# print("\n","\n")


#Run Granger causality including google
# print("\033[1m" + "Results for granger causality test:"+ "\033[0m","\n","\n")
# print(results.test_causality('norm_media_high', 'norm_google_antivaxx', kind='f').summary())
# print("\n","\n")
# print(results.test_causality('norm_google_antivaxx', 'norm_media_high', kind='f').summary())
# print("Results for Normality test from the VAR package:","\n","\n")
#
#
#
# print("\033[1m" + "Results for granger causality test:"+ "\033[0m","\n","\n")
# print(results.test_causality('norm_media_low', 'norm_google_antivaxx', kind='f').summary())
# print("\n","\n")
# print(results.test_causality('norm_google_antivaxx', 'norm_media_low', kind='f').summary())
#
#
#
# print("\n","\n")
# print("\033[1m" + "Results for granger causality test:"+ "\033[0m","\n","\n")
# print(results.test_causality('norm_media_U', 'norm_google_antivaxx', kind='f').summary())
# print("\n","\n")
# print(results.test_causality('norm_google_antivaxx', 'norm_media_U', kind='f').summary())
# print("\n","\n")
#
#
# print("\n","\n")
# print("\033[1m" + "Results for granger causality test:"+ "\033[0m","\n","\n")
# print(results.test_causality('norm_media_unreliable', 'norm_google_antivaxx', kind='f').summary())
# print("\n","\n")
# print(results.test_causality('norm_google_antivaxx', 'norm_media_unreliable', kind='f').summary())
# print("\n","\n")

# ###Google and Bing Search
# print("\033[1m" + "Results for granger causality test:"+ "\033[0m","\n","\n")
# print(results.test_causality('norm_google_antivaxx', 'norm_search_antivaxx', kind='f').summary())
# print("\n","\n")
# print(results.test_causality('norm_search_antivaxx', 'norm_google_antivaxx', kind='f').summary())
#



#Run Granger causality including twitter

######Twitter Added (Media and Twitter)
# print("\033[1m" + "Results for granger causality test:"+ "\033[0m","\n","\n")
# print(results.test_causality('norm_media_high', 'norm_twitter_antivaxx', kind='f').summary())
# print("\n","\n")
# print(results.test_causality('norm_twitter_antivaxx', 'norm_media_high', kind='f').summary())
# print("Results for Normality test from the VAR package:","\n","\n")
#
# print("\033[1m" + "Results for granger causality test:"+ "\033[0m","\n","\n")
# #print(results.test_causality('norm_media_mid', 'norm_twitter_antivaxx', kind='f').summary())
# print("\n","\n")
# #print(results.test_causality('norm_twitter_antivaxx', 'norm_media_mid', kind='f').summary())
# print("Results for Normality test from the VAR package:","\n","\n")
#
# print("\033[1m" + "Results for granger causality test:"+ "\033[0m","\n","\n")
# print(results.test_causality('norm_media_low', 'norm_twitter_antivaxx', kind='f').summary())
# print("\n","\n")
# print(results.test_causality('norm_twitter_antivaxx', 'norm_media_low', kind='f').summary())
#
#
#
# print("\n","\n")
# print("\033[1m" + "Results for granger causality test:"+ "\033[0m","\n","\n")
# print(results.test_causality('norm_media_U', 'norm_twitter_antivaxx', kind='f').summary())
# print("\n","\n")
# print(results.test_causality('norm_twitter_antivaxx', 'norm_media_U', kind='f').summary())
# print("\n","\n")
#
# print("\n","\n")
# print("\033[1m" + "Results for granger causality test:"+ "\033[0m","\n","\n")
# print(results.test_causality('norm_media_unreliable', 'norm_twitter_antivaxx', kind='f').summary())
# print("\n","\n")
# print(results.test_causality('norm_twitter_antivaxx', 'norm_media_unreliable', kind='f').summary())
# print("\n","\n")
#
# ###Twitter and Bing Search
# print("\033[1m" + "Results for granger causality test:"+ "\033[0m","\n","\n")
# print(results.test_causality('norm_twitter_antivaxx', 'norm_search_antivaxx', kind='f').summary())
# print("\n","\n")
# print(results.test_causality('norm_search_antivaxx', 'norm_twitter_antivaxx', kind='f').summary())

# ###Twitter and Google Search
# print("\033[1m" + "Results for granger causality test:"+ "\033[0m","\n","\n")
# print(results.test_causality('norm_twitter_antivaxx', 'norm_google_antivaxx', kind='f').summary())
# print("\n","\n")
# print(results.test_causality('norm_google_antivaxx', 'norm_twitter_antivaxx', kind='f').summary())



# Producing the IRF Plots 

irf = results.irf(7)
irf.plot(orth=True, subplot_params={'fontsize':7})
#plt.show(block=False)
irf0 = results.irf(10)

irf0.plot(orth=False)
fig = irf.plot(orth=False)
fig.tight_layout()
fig.set_figheight(30)
fig.set_figwidth(30)




fig3=irf.plot(impulse='norm_media_low', orth=True, response='norm_twitter_antivaxx')
fig3.set_figheight(8)
fig3.set_figwidth(7)



fig3_2=irf.plot(impulse='norm_twitter_antivaxx', orth=True, response='norm_media_low')
fig3_2.set_figheight(8)
fig3_2.set_figwidth(7)



fig4=irf.plot(impulse='norm_media_unreliable', orth=True, response='norm_google_antivaxx')
fig4.set_figheight(8)
fig4.set_figwidth(7)




fig4=irf.plot(impulse='norm_media_unreliable', orth=True, response='norm_google_antivaxx')
fig4.set_figheight(4)
fig4.set_figwidth(4)



fig4_2=irf.plot(impulse='norm_google_antivaxx', orth=True, response='norm_media_unreliable')
fig4_2.set_figheight(8)
fig4_2.set_figwidth(7)



irf.plot(impulse='norm_media_low')
irf.plot(impulse='norm_media_U')
irf.plot(impulse='norm_media_unreliable')
irf.plot(impulse='norm_media_U')
irf.plot(impulse='norm_twitter_antivaxx')
